In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly_express as px
from google.cloud import bigquery
from google.oauth2 import service_account
from prophet import Prophet
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, SeasonalWindowAverage, AutoARIMA
Importando os dados¶
In [3]:
projeto_id = 'pos-tech-403001'
dataset_id = 'tech_challenge'
tabela_id = 'raw_petr_brent'
In [4]:
# Autenticação para o BigQuery usando arquivo de credenciais
credentials = service_account.Credentials.from_service_account_file('/home/chave.json')
client = bigquery.Client(credentials=credentials, project=projeto_id)
In [5]:
# Consulta SQL para selecionar todos os dados da tabela
consulta_sql = f'SELECT * FROM `{projeto_id}.{dataset_id}.{tabela_id}`'
# Executa a consulta
resultado = client.query(consulta_sql)
# Converte o resultado em um DataFrame do Pandas
df = resultado.to_dataframe()
Tratando os dados¶
In [6]:
# transformando a coluna com as datas para Datetime, e ordernando essa coluna
df['Data'] = pd.to_datetime(df['Data'], format='%d/%m/%Y')
df = df.sort_values(by='Data', ascending=True)
df.reset_index(inplace=True, drop=True)
Análise Exploratória¶
In [7]:
fig = px.line(df, x = 'Data', y='preco_petroleo_bruto')
fig.update_layout(title='Preço Petróleo bruto',
xaxis_title='Data',
yaxis_title='Preço')
fig.show()
Prophet¶
In [8]:
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from prophet.plot import plot_plotly, plot_components_plotly
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
from prophet.plot import plot_cross_validation_metric
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
In [9]:
df_prophet = df[['Data', 'preco_petroleo_bruto']]
df_prophet.columns = ['ds','y']
df_prophet.tail()
Out[9]:
| ds | y | |
|---|---|---|
| 11077 | 2024-01-02 | 76.24 |
| 11078 | 2024-01-03 | 77.18 |
| 11079 | 2024-01-04 | 75.79 |
| 11080 | 2024-01-05 | 78.31 |
| 11081 | 2024-01-08 | 75.47 |
In [10]:
def train_test_split_data(dff, start_train, end_train):
train = dff.loc[(dff['ds'] >= start_train) & (dff['ds'] <= end_train)]
test = dff.loc[(dff['ds'] > end_train)]
future = len(test)
return train, test, future
In [86]:
def mape(y_true,y_pred):
y_true = y_true.values
y_pred = y_pred.values
r = np.mean(np.abs((y_true - y_pred) / y_true))
return round(r*100, 2)
In [133]:
train, test, _ = train_test_split_data(df_prophet,'1980-06-01','2023-11-23')
model = Prophet(interval_width=0.95)
model.fit(train)
fut = model.make_future_dataframe(periods=len(test), include_history=True, freq='D')
forecast = model.predict(fut)
print(f'Tamanho da base de treino:{train.shape}')
print(f'Tamanho da base de teste:{test.shape}')
plot_plotly(model, forecast)
20:08:34 - cmdstanpy - INFO - Chain [1] start processing 20:08:38 - cmdstanpy - INFO - Chain [1] done processing
Tamanho da base de treino:(11052, 2) Tamanho da base de teste:(30, 2)
In [135]:
plot_components_plotly(model, forecast)